---
title: "Untitled"
author: "Renae L. Shrum"
output: word_document
---
# Old Faithful Data Analysis
## From R's dataset `faithful`

```{r,comment=""}
data(faithful)
attach(faithful)
library(stargazer)
stargazer(faithful,title="Summary Statistics for Old Faithful",type="text",flip=T)
```
### Summary Statistics
```{r}
summary(faithful)
```
### Summary Statistics (without showing the code and '##')
```{r,echo=FALSE,comment=""}
summary(faithful)
```
### Graphs of eruptions and waiting:

```{r, echo=FALSE}
hist(waiting,main="Histogram of Waiting")
hist(eruptions,main='Histogram of Eruptions')
boxplot(eruptions,main='Boxplot of eruptions')
boxplot(waiting,main='Boxplot of waiting')
```

### Regression model x=eruptions y=waiting
```{r,comment=""}
faith.fit=lm(waiting~eruptions)
summary(faith.fit)
# diagnostic plots
res=residuals(faith.fit)
pred=fitted.values(faith.fit)
# mean of residuals = 0
hist(res)
# homogenous (constant) variance
plot(pred,res,pch=17,main="Predicted vs. Residuals")
abline(0,0)
# independence of residuals
order=c(1:length(res))
plot(order,res,type='l')
abline(0,0)
# another way to check independence of residuals
# install car package if needed
# load the car package
library(car)
durbinWatsonTest(faith.fit)
# QQplot (normal probability plot)
qqnorm(res)
qqline(res)
par(mfrow=c(1,1))

# load the car package
library(car)
# Leverage (outliers, influential points)
qqPlot(faith.fit, simulate=T)

# look at hat values
plot(hatvalues(faith.fit))
abline(h=2*(length(faith.fit$coefficients))/length(faith.fit$residuals), col=2)
# influence
# look at the distance between the regression coefficients with the ith observation present or absent
plot(cooks.distance(faith.fit))
abline(h=4/(length(faith.fit$residuals)-length(faith.fit$coefficients)), col=2)
bound=1.5*(2*(length(faith.fit$coefficients))/length(faith.fit$residuals))
# Influence plot in car-package combines the studentized residuals, hat values and Cook's distances
# area of the circles correspond to Cook's distances
influencePlot(faith.fit, xlim=c(0,bound), ylim=c(-5,5))
```
### Hypothesis test: H0: mu(waiting)=68 vs. Ha: mu(waiting)!=68
```{r,comment=""}
t.test(waiting,mu=68,alternative="two.sided")
```